from wordcloud import STOPWORDS
from collections import Counter
import jieba
from sql_read import message_read


def fen_ci(sign):
    message_str = message_read(sign)
    jieba_list = jieba.lcut(message_str)  # jieba.lcut分词返回一个列表
    # print(jieba_list)
    for ci in range(len(jieba_list)):  # 遍历分词后的列表元素
        if len(jieba_list[ci]) == 1:  # 如果词的个数为1即一个字的元素，用空字符串替换
            jieba_list[ci] = ''
    stopwords = set(STOPWORDS)
    stopwords.add('年月日')
    stopwords.add('没有')
    stopwords.add('我们')
    stopwords.add('现在')
    stopwords.add('这个')
    stopwords.add('一个')
    stopwords.add('就是')
    stopwords.add('rdquo')
    stopwords.add('ldquo')
    stopwords.add('hellip')
    stopwords.add('nbsp')
    stopwords.add('这样')
    stopwords.add('可以')
    stopwords.add('本人')
    stopwords.add('但是')
    stopwords.add('为什么')
    stopwords.add('已经')
    stopwords.add('他们')
    stopwords.add('市长')
    stopwords.add('你好')
    stopwords.add('小区')
    stopwords.add('不能')
    stopwords.add('一直')
    stopwords.add('如果')
    stopwords.add('领导')
    stopwords.add('因为')
    stopwords.add('办理')
    stopwords.add('尊敬')
    stopwords.add('很多')
    stopwords.add('我家')
    stopwords.add('芜湖')
    stopwords.add('芜湖市')
    stopwords.add('自己')
    stopwords.add('进行')
    stopwords.add('问题')
    stopwords.add('作为')
    stopwords.add('要求')
    stopwords.add('由于')
    stopwords.add('所以')
    stopwords.add('时间')
    stopwords.add('知道')
    stopwords.add('情况')
    stopwords.add('需要')
    stopwords.add('希望')
    stopwords.add('您好')
    stopwords.add('这些')
    stopwords.add('当时')
    stopwords.add('无法')
    stopwords.add('谢谢')
    stopwords.add('处理')
    stopwords.add('还是')
    stopwords.add('不是')
    stopwords.add('反映')
    stopwords.add('多次')
    stopwords.add('通过')
    stopwords.add('应该')
    stopwords.add('任何')
    stopwords.add('而且')
    stopwords.add('什么')
    stopwords.add('开始')
    stopwords.add('解决')
    stopwords.add('至今')
    stopwords.add('你们')
    stopwords.add('可是')
    stopwords.add('难道')
    stopwords.add('政府')
    stopwords.add('目前')
    stopwords.add('今年')
    stopwords.add('回复')
    stopwords.add('这种')
    stopwords.add('时候')
    stopwords.add('得到')
    stopwords.add('人员')
    stopwords.add('地方')
    stopwords.add('一名')
    stopwords.add('为了')
    stopwords.add('真的')
    stopwords.add('相关')
    stopwords.add('怎么')
    stopwords.add('能够')
    stopwords.add('一下')
    stopwords.add('存在')
    stopwords.add('部门')
    stopwords.add('恳请')
    stopwords.add('月份')
    stopwords.add('答复')
    stopwords.add('反应')
    stopwords.add('严重')
    stopwords.add('不了')
    stopwords.add('给予')
    stopwords.add('结果')
    stopwords.add('还有')
    stopwords.add('是否')
    stopwords.add('有关')
    stopwords.add('工作人员')
    stopwords.add('有关')
    stopwords.add('按照')
    stopwords.add('市政府')
    # print(stopwords)
    stop_path = open("./stoplist.txt", "r", encoding="utf-8")  # 导入停用词
    stop_words = stop_path.readlines()
    stop_words = [word.strip("\n") for word in stop_words]
    stopwords = stopwords | set(stop_words)
    filtered_sentence = [w for w in jieba_list if not w in stopwords]
    # filtered_sentence.remove("")
    # # print(filtered_sentence)
    counter = Counter(filtered_sentence)
    del counter[""]
    # print(dict(counter))
    list1 = counter.most_common(1500)
    datas = []
    for i in list1:
        temp_dict = {}
        temp_dict['name'] = i[0]
        temp_dict['value'] = i[-1]
        datas.append(temp_dict)
    print(datas)
    return datas


fen_ci('2015%')
